##regional powers##
rm(list = ls())
library(tidyverse)
library(dplyr)
library(rlang)
library(purrr)
library(readxl)
library(haven)
library(readxl)

## Working directories
#set your own directory please!
#setwd("~/Dropbox (Harvard University)/Gov_2001_Rep/R Scripts")
#setwd("~/Dropbox (Harvard University)/Gov 2001 Rep Paper/R Scripts")

############################
######### NMC Data ######### 
############################
NMC_5_0 <- read_csv("Datasets/power index/NMC_5_0.csv")

## Region and year subset
NorAm <- c("USA", "CAN", "BHM", "CUB", "HAI", "DOM",	"JAM", "TRI", "BAR", "DMA", "GRN", "SLU","SVG","AAB","SKN","MEX")
SouAm <- c("COL", "VEN", "GUY", "SUR", "ECU", "PER", "BRA","BOL",	"PAR","CHL","ARG","URU")
CenAm <- c("BLZ","GUA", "HON", "SAL",	"NIC","COS","PAN"	)
Eu<- c("UKG", "IRE", "NTH","BEL","LUX", "FRN",	"MNC", "LIE",	"SWZ", "SPN",	"AND","POR","HAN", "BAV",	"GMY","GFR","GDR","BAD","SAX", "WRT","HSE",	"HSG",	
       "MEC",	"POL",	"AUH","AUS","HUN","CZE","CZR","SLO","ITA","PAP","SIC", "SNM", "MOD","PMA","TUS","MLT","ALB","MNG","MAC", "CRO","YUG","BOS","KOS", "SLV","GRC",
       "CYP",	"BUL",	"MLD",	"ROM",	"RUS",	"EST",	"LAT",	"LIT",	"UKR",	"BLR",	"ARM",	"GRG",	"FIN",	"SWD",	"NOR",	"DEN",	"ICE")
Afr <- c("CAP","STP","GNB","EQG","GAM","MLI","SEN","BEN","MAA","NIR","CDI","GUI","BFO","LBR","SIE","GHA","TOG","CAO","NIG","GAB","CEN", "CHA", "CON", "DRC",	
         "UGA","KEN", "TAZ",	"ZAN","BUI","RWA","SOM", "DJI",	"ETH","ERI","ANG",	"MZM",	"ZAM",	"ZIM",	"MAW",	"SAF",	"NAM", "LES",	"BOT", "SWA",	"MAG",
         "COM",	"MAS",	"SEY", "MOR", "ALG","TUN",	"LIB",	"SUD", "SSD")
MidEa <- c("IRN","TUR","IRQ","EGY",	"SYR","LEB","JOR","ISR", "SAU",	"YAR",	"YEM",	"YPR",	"KUW","BAH",	"QAT",	"UAE",	"OMA")
Oceania <- c("AUL",	"PNG",	"NEW",	"VAN",	"SOL",	"KIR",	"TUV",	"FIJ",	"TON",	"NAU",	"MSI",	"PAL",	"FSM"	,"WSM")
CenAs <- c("AFG","TKM","TAJ","KYR","UZB",	"KZK","AZE")	
EaAs<-c("CHN",	"MON","TAW", "KOR",	"PRK",	"ROK","JPN")
SAs <- c("IND","BHU","PAK",	"BNG", "MYA","SRI",	"MAD","NEP")	
SeAs <- c("THI","CAM", "LAO",	"DRV",	"RVN",	"MAL","SIN","BRU","PHI","INS"	,"ETM")

years <- seq(1918,2001,1)  
reg_vector <- c("NorAm", "CenAm","SouAm", "Eu", "Afr", "MidEa", "Oceania", "CenAs", "EaAs", "SAs", "SeAs")

## Mutate
# Create region variable
for(reg in reg_vector) {
  NMC_5_0 <- NMC_5_0  %>% mutate(!!sym(str_c("region_", reg)) := if_else(stateabb %in% eval(parse_expr(reg)), reg, NA_character_)) }
NMC_5_0 <- NMC_5_0 %>% mutate(region = coalesce(!!!rlang::syms(names(dplyr::select(NMC_5_0, region_NorAm:region_SeAs))))) %>% dplyr::select(-contains("region_")) %>%
  
  # Create regional power and military expenditure ranks
  group_by(region, year) %>% 
  mutate(reg_power_rank = dense_rank(desc(cinc)),
         reg_power_quintile = 6 - dplyr::ntile(reg_power_rank, n=5),
         
         # Create military expenditure variable
         milex_rank = dense_rank(desc(milex)),
         milex_quintile = dplyr::ntile(-milex_rank, n=5)) %>% ungroup() %>%
  
  # World power rank
  group_by(year) %>%
  mutate(world_power_rank = dense_rank(desc(cinc)),
         world_power_quintile = dplyr::ntile(-world_power_rank, n=5),
         world_power_decile = dplyr::ntile(-world_power_rank, n=10),
         
         #create diff in quint and diff in rank variables 
         diff_in_quint = world_power_quintile - reg_power_quintile, 
         diff_in_rank = world_power_rank - reg_power_rank) %>%
  # Subset years
  filter(year %in% years) %>%
  
  # Variables to match on
  rename(country_text = stateabb, country_code = ccode)

#View(filter(NMC_5_0, between(year, 1940, 1950), region == "SAs"))

saveRDS(NMC_5_0, "powers.RDS") 


############################
######### VDem Data ######### 
############################

# Female lower house & Female Suffrage 
# avg years of education adults above 15

vdem <- read_dta("Datasets/vdem_stata.dta", col_select = c(country_text_id, COWcode, v2lgfemleg, year, v2fsuffrage, e_peaveduc)) %>%
  filter(year %in% years) %>%
  rename(fem_house = v2lgfemleg, female_suffrage = v2fsuffrage, country_text = country_text_id)

vdem_wcivlib <- read_dta("Datasets/vdem_stata.dta")%>% select(c(v2clgencl,year))%>%filter(year %in% years)

sum(as.numeric(is.na(vdem_wcivlib))) #632

sum(as.numeric(is.na(vdem$female_suffrage))) #44

saveRDS(vdem, "vdem_subset.RDS") 

#####################################
######### Conscription Data ######### 
#####################################

# Conscription 
conscription <- read_csv("Datasets/Conscription data.csv") %>%
  dplyr::select(year, recruit, cabbr, ccode) %>% 
  filter(year %in% years) %>%
  rename(country_text = cabbr, country_code = ccode)

saveRDS(conscription, "conscription.RDS")

#####################################
######### Dyad Data ######### 
#####################################
# # colony dyad
# dyad <- read_dta("Datasets/dyads.dta")
# dyad<-dyad %>% dplyr::select(iso_o, iso_d, colony) %>% 
#   rename(state1 = iso_o, state2 = iso_d) %>%
# 
# saveRDS(dyad, "colony_dyad.RDS")

#####################################
######### Geo Data ##################
#####################################
# geo <- read_dta("Datasets/geo.dta")
# geo <- geo %>% dplyr::select (iso3, colonizer1,colonizer2, colonizer3, colonizer4, 
#                        short_colonizer1,short_colonizer2,short_colonizer3) %>% 
#   rename(country_text = iso3)
# 
# saveRDS(geo, "colony.RDS")

#####################################
######### CHISOLS Data ##############
#####################################

# chisols <- read_csv("Datasets/Changes in source of leadership_year/CHISOLSstyr4_0.csv") %>%
#   dplyr::select (ccode, affiliation, year) %>% filter (year %in% years) #have to merge by country code
#  
# saveRDS(chisols, "chisols.RDS")

#####################################
######### leadership Data ###########
#####################################

leadership <- read_dta("Datasets/Leadership.dta") %>% 
  dplyr::select (ccode, leveledu, milservice, combat,
                 miledu, warloss, warwin, year, idacr, leadername)%>% 
  filter (year %in% years) %>%
  rename(country_text = idacr, country_code = ccode)

saveRDS(leadership, "leadership.RDS")

###################################################
######### Autocracies of the World Data ###########
###################################################

regime <- read_excel("Datasets/Autocracies of the World Dataset/Data_Set.xls") %>%
  dplyr::select(year,ccode,scode,lindex,regime_r) %>% filter(year %in% years) %>%
  rename(country_text = scode, country_code = ccode)

saveRDS(regime, "regime.RDS")
